library(HDF5Array)
library(ggplot2)
library(Matrix)
library(pheatmap)
library(RColorBrewer)
library(uwot)
library(grid)
library(gridExtra)
library(GEDI)
dir_data<- "../data_objects/"
dir_data_hdf5<- paste0(dir_data, "COVID19_SCE/")
#' Plot 2D embedding
#'
#' Plot a 2D representation (embedding) of cells
#' @param embedding_mat Embedding
#' @param colour vector of variable to plot
#' @param randomize Logical. Whether to randomize data before plotting.
#'
#' @return ggplot2 object
#' @export
#'
plot_embedding <- function(embedding_mat,colour,randomize=T, size_point=0.05) {
# create a data frame that will have the embedding as well as the colors
embedding_obj <- data.frame(
Dim1=embedding_mat[,1],
Dim2=embedding_mat[,2],
Var=colour )
# randomize the order of the objects
if( randomize ) {
embedding_obj <- embedding_obj[ sample.int(nrow(embedding_obj)), ]
}
# create the plots
if(is.numeric(colour)) # the color variable is numeric
{
#embedding_obj$Var <- embedding_obj$Var - mean(embedding_obj$Var)
lim <- stats::quantile(abs(embedding_obj$Var),0.99)
ggplot2::ggplot(
embedding_obj, ggplot2::aes_string( x="Dim1", y="Dim2", colour="Var"))+
ggplot2::geom_point(size=size_point)+
ggplot2::theme_minimal()+
ggplot2::scale_color_gradientn( limits=c(-lim,lim), colours=c("blue","light grey","red"), oob=scales::squish )
} else {
ggplot2::ggplot(
embedding_obj, ggplot2::aes_string( x="Dim1", y="Dim2", colour="Var"))+
ggplot2::geom_point(size=size_point)+
ggplot2::theme_minimal()+
ggplot2::guides(colour=ggplot2::guide_legend(override.aes=list(size=3)))
}
}
# Reading SCE object
sce<- loadHDF5SummarizedExperiment(dir=dir_data_hdf5)
Loading required package: SingleCellExperiment
Loading required package: SummarizedExperiment
Loading required package: GenomicRanges
Loading required package: GenomeInfoDb
Loading required package: Biobase
Welcome to Bioconductor
Vignettes contain introductory material; view with
'browseVignettes()'. To cite Bioconductor, see
'citation("Biobase")', and for packages 'citation("pkgname")'.
Attaching package: 'Biobase'
The following object is masked from 'package:MatrixGenerics':
rowMedians
The following objects are masked from 'package:matrixStats':
anyMissing, rowMedians
meta<- data.frame(colData(sce))
# Reading GEDI model ( cohort1 results)
model<- readRDS(paste0(dir_data, "COVID19_gedi_model_cohort1.rds"))
# reorder meta based on GEDI order
meta<- meta[model$aux$cellIDs,]
t( model$aux$inputH)
(Intercept) group_per_samplemild group_per_samplesevere
C19-CB-0001 1 1 0
C19-CB-0003 1 1 0
C19-CB-0002 1 1 0
C19-CB-0005 1 1 0
C19-CB-0009 1 0 1
C19-CB-0012 1 0 1
C19-CB-0013 1 0 1
C19-CB-0011 1 0 1
C19-CB-0008 1 0 1
C19-CB-0020 1 0 1
C19-CB-0021 1 0 1
C19-CB-0016 1 0 1
C19-CB-0198 1 0 1
C19-CB-0204 1 1 0
C19-CB-0199 1 0 1
C19-CB-0214 1 1 0
C19-CB-0053 1 1 0
C19-CB-0052 1 1 0
P18F 1 0 0
P17H 1 0 0
P20H 1 0 0
P15F 1 0 0
P08H 1 0 0
P13H 1 0 0
P07H 1 0 0
P06F 1 0 0
P04H 1 0 0
C2P01H 1 0 0
P09H 1 0 0
P02H 1 0 0
C2P05F 1 0 0
C2P07H 1 0 0
C2P13F 1 0 0
C2P16H 1 0 0
C2P10H 1 0 0
C2P19H 1 0 0
C2P15H 1 0 0
one_k_v3 1 0 0
Five_k_v3 1 0 0
Ten_k_v3 1 0 0
# Project the vector field on the SVD space
vectorField <- svd.vectorField.gedi(
model, start.cond = c(1,0,0), end.cond = c(1,0,1), scale_cond_vector=0.3 )
# The vectorField list contains the SVD results:
# The 'u' object contains the gene loadings (JxK)
# The 'd' object is the singular values
# The 'v' object is a 2N x K matrix, with the first N rows containing the
# start coordinates, and the second N rows containing the end coordinates
# Getting overall velocity
DiffExp <- getDiffExp.gedi( model, c(0,0,1) )
velocity<- colSums(DiffExp^2)
# Euclidean distance
set.seed(43)
umap_vectorField <- umap(
vectorField$v %*% diag(vectorField$d), min_dist=0.5,
metric="euclidean")
Embedding plots
## Cell type embedding indices
ggp<- plot_embedding( umap_vectorField[vectorField$embedding_indices,], meta$id.celltype) +
theme_void() +
theme(legend.position ="none")
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
Warning: Please use tidy evaluation idioms with `aes()`.
Warning: See also `vignette("ggplot2-in-packages")` for more information.
ggp
## Saving the colors
g <- ggplot_build(ggp)
df<- g$data[[1]]
df<- unique(df[,c("group", "colour")])
df<- df[order(df$group),]
temp_vec<- levels(meta$id.celltype)
temp_vec<- temp_vec[temp_vec %in% unique(meta$id.celltype)]
df$celltype<- temp_vec
vec_colors<- df$colour
names(vec_colors)<- df$celltype
ggp<- plot_embedding( umap_vectorField[vectorField$embedding_indices,], meta$id.celltype) +
theme_void() +
theme(legend.position ="right")
legend <- cowplot::get_legend(ggp)
grid.newpage()
grid.draw(legend)
## Donor embedding indices
plot_embedding( umap_vectorField[vectorField$embedding_indices,], meta$donor) +
theme_void() +
theme(legend.position ="none")
ggp<- plot_embedding( umap_vectorField[vectorField$embedding_indices,], meta$donor) +
theme_void() +
theme(legend.position ="right")
legend <- cowplot::get_legend(ggp)
grid.newpage()
grid.draw(legend)
Vector field plots
## Cell type
plot_vectorField( umap_vectorField, meta$id.celltype, nbin=50, minNum=30 ) +
theme_void() +
theme(legend.position ="none") +
scale_color_manual(values=vec_colors)
ggp<- plot_vectorField( umap_vectorField, meta$id.celltype, nbin=50, minNum=30 ) +
theme_void() +
theme(legend.position ="right") +
scale_color_manual(values=vec_colors )
legend <- cowplot::get_legend(ggp)
grid.newpage()
grid.draw(legend)
Boxplot of the magnitude of the gene expression changes
meta$velocity_severe<- velocity
## limit for boxplot
lim <- stats::quantile(abs(meta$velocity_severe),0.999)
meta$id.celltype<- factor(meta$id.celltype, levels=rev(levels(meta$id.celltype) ) )
ggp<- ggplot(meta, aes(velocity_severe,id.celltype, color = id.celltype ) ) +
geom_boxplot(outlier.shape = NA) +
xlim(0, lim) +
scale_color_manual(values=vec_colors) +
theme_minimal() +
theme(legend.position ="none")
ggp
Warning: Removed 87 rows containing non-finite values (`stat_boxplot()`).
sessionInfo()
R version 4.0.0 (2020-04-24)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: CentOS Linux 7 (Core)
Matrix products: default
BLAS/LAPACK: /cvmfs/soft.computecanada.ca/easybuild/software/2020/Core/imkl/2020.1.217/compilers_and_libraries_2020.1.217/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so
locale:
[1] LC_CTYPE=en_CA.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_CA.UTF-8 LC_COLLATE=en_CA.UTF-8
[5] LC_MONETARY=en_CA.UTF-8 LC_MESSAGES=en_CA.UTF-8
[7] LC_PAPER=en_CA.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] grid parallel stats4 stats graphics grDevices utils
[8] datasets methods base
other attached packages:
[1] SingleCellExperiment_1.12.0 SummarizedExperiment_1.20.0
[3] Biobase_2.50.0 GenomicRanges_1.42.0
[5] GenomeInfoDb_1.26.7 GEDI_0.0.0.9000
[7] gridExtra_2.3 uwot_0.1.10
[9] RColorBrewer_1.1-2 pheatmap_1.0.12
[11] ggplot2_3.4.2 HDF5Array_1.18.1
[13] rhdf5_2.34.0 DelayedArray_0.16.3
[15] IRanges_2.24.1 S4Vectors_0.28.1
[17] MatrixGenerics_1.2.1 matrixStats_0.58.0
[19] BiocGenerics_0.36.0 Matrix_1.3-3
loaded via a namespace (and not attached):
[1] Rcpp_1.0.8.3 lattice_0.20-41 digest_0.6.27
[4] utf8_1.2.1 RSpectra_0.16-0 plyr_1.8.6
[7] R6_2.5.0 backports_1.2.1 evaluate_0.14
[10] highr_0.8 pillar_1.8.1 zlibbioc_1.36.0
[13] rlang_1.1.0 data.table_1.14.0 jquerylib_0.1.3
[16] checkmate_2.1.0 rmarkdown_2.7 labeling_0.4.2
[19] stringr_1.5.0 RcppEigen_0.3.3.9.1 RCurl_1.98-1.3
[22] munsell_0.5.0 metR_0.13.0 compiler_4.0.0
[25] xfun_0.22 pkgconfig_2.0.3 htmltools_0.5.1.1
[28] tidyselect_1.2.0 tibble_3.2.1 GenomeInfoDbData_1.2.4
[31] codetools_0.2-16 fansi_0.4.2 dplyr_1.1.1
[34] withr_2.5.0 bitops_1.0-6 rhdf5filters_1.2.0
[37] jsonlite_1.8.4 gtable_0.3.0 lifecycle_1.0.3
[40] magrittr_2.0.3 scales_1.2.1 cachem_1.0.4
[43] cli_3.6.1 stringi_1.5.3 farver_2.1.0
[46] XVector_0.30.0 bslib_0.2.4 generics_0.1.3
[49] vctrs_0.6.1 cowplot_1.1.1 RcppAnnoy_0.0.18
[52] Rhdf5lib_1.12.1 tools_4.0.0 glue_1.6.2
[55] fastmap_1.1.0 yaml_2.2.1 colorspace_2.0-0
[58] memoise_2.0.0 knitr_1.31 sass_0.3.1